# -*- coding: utf-8 -*-
import random
import scrapy
from esgcc.util.login import SeleniumLogin
from zc_core.util import file_reader
from scrapy import Request
from zc_core.spiders.base import BaseSpider
from esgcc.cert.cert_util import init_folder, check_pool_exist, copy_from_pool, check_image_exist, parse_cert
from esgcc.rules import *
from scrapy.exceptions import IgnoreRequest


class CertListSpider(BaseSpider):
    name = 'cert_list'
    custom_settings = {
        'CONCURRENT_REQUESTS': 3,
        'DOWNLOAD_DELAY': 0.3,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 3,
        'CONCURRENT_REQUESTS_PER_IP': 3,
    }

    # 常用链接
    attr_url = 'http://b.esgcc.com.cn/products/getOneProdAttribute?prodid={}&targetId=p_con_attr'
    cert_url = 'http://b.esgcc.com.cn/showDetail/getQualityImages?productId={}&start=0&rows=10&targetId=p_con_qualified'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(CertListSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 目标商品
        self.target = file_reader.read_rows('doc/sku_list.txt')
        # 初始化目录
        init_folder(self.batch_no)

    def start_requests(self):
        # cookies = SeleniumLogin().get_cookies()
        cookies = {"__s_f_c_s_": "08A4F7F4B462071DD0A4CC0F995EF3E4", "__d_s_": "08A4F7F4B462071DD0A4CC0F995EF3E4",
                   "__t_c_k_": "5874b56602a345b1b61a5e5a24d0d668", "JSESSIONID": "2D7CF38F87331A488DE5CE5A650A963E"}
        if not cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', cookies)

        random.shuffle(self.target)
        for sku_id in self.target:
            # 是否已下载过
            if check_pool_exist(sku_id):
                self.logger.info('池中存在: [%s]' % sku_id)
                copy_from_pool(sku_id, self.batch_no)
                continue

            # 资质
            yield Request(
                url=self.cert_url.format(sku_id),
                cookies=cookies,
                callback=self.parse_cert,
                errback=self.error_back,
                meta={
                    'reqType': 'attr',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                },
                headers={
                    'Host': 'b.esgcc.com.cn',
                    'Connection': 'keep-alive',
                    'Upgrade-Insecure-Requests': '1',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                },
                priority=25,
                # dont_filter=True
            )
            self.logger.info('采集: [%s]' % sku_id)

    # 处理商品介绍
    def parse_cert(self, response):
        item = parse_cert(response)
        if item and len(item.get('image_urls')) and len(item.get('image_paths')):
            # 是否已下载过
            if not check_image_exist(item):
                item['cookies'] = response.request.cookies
                self.logger.info('资质: sku=%s, file=%s' % (item.get('skuId'), len(item.get('image_urls'))))
                yield item
            else:
                self.logger.info('已下载<%s>', item.get('skuId'))
        else:
            self.logger.info('无资质: sku=%s' % item.get('skuId'))
